# import required modules
import warnings
warnings.filterwarnings('ignore')
import sys
stdout = sys.stdout
import pickle
from fdc import FDC
# from https://github.com/alexandreday/fast_density_clustering
import os
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import sklearn.metrics as metrics
import pickle
import cv2 # OpenCV
from scipy import ndimage as ndi
from skimage.morphology import watershed
from skimage.feature import peak_local_max
from sklearn.manifold import TSNE
from sklearn.metrics import confusion_matrix
from matplotlib import colors
from matplotlib import cm
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from skimage.io import imsave
from skimage.io import imread
from read_roi import read_roi_file
from read_roi import read_roi_zip
# from https://github.com/hadim/read-roi
import seaborn.apionly as sns
sns.set_context("poster")
from IPython.display import display
%matplotlib inline
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
plt.rcParams['svg.fonttype'] = 'none'
DIR = '/Users/gu6/GoogleDrive/datascience/work/sec_final/scn'
classes_for_UNet = {'C4':8,'C13':2,'C3':1,'C9':5,'C1':7,'C12':3,'C8':6,'C11':4,'C5':9,'C6':10}
class_map_inv = {v: k for k, v in classes_for_UNet.items()}
modelprobes = ['trpm8','calca','nppb','tmem233','trpa1','fxyd2','mrgprd','s100b']
def binarize(preds):
# function discretizes the predictions, i.e. for each pixel gives the cell class with the
# highest probability, as well as some image cleanup
# input are class prediction probabilities with dimensions height x width x (N_CLASSES + 1)
# we take the maximum class for each pixel and encode it as a one-hot array, i.e.
# we still have a stack of (N_CLASSES + 1) x height x width
preds = np.argmax(preds,axis=-1)
preds = (np.arange(preds.max()+1) == preds[...,None]).astype(np.uint8)
# we then morphologically open all layers except the background layer (0)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (6,6));
for class_ in range(1,11):
preds[:,:,class_] = cv2.morphologyEx(preds[:,:,class_], cv2.MORPH_OPEN, kernel)
# we redefine the background layer as the leftover from all other layers
preds[:,:,0] = np.max(preds[:,:,1:],axis=-1) == 0
return preds
def get_contours(pred_bin, kernel = np.ones((2,2))):
# function performs distance transform watershed segmentation on the individual
# class layers of a discretized prediction
# returns a list of the openCV contours of the cells and a list of the cell
# classes (same order)
contours = []
classes = []
# we segment each class layer separately
for class_ in range(1,pred_bin.shape[-1]):
# get single 2D image of the layer
opening = pred_bin[:,:,class_]
# calculate distance transform
distance = ndi.distance_transform_edt(opening)
# get local maxima as seeds for watershed
local_maxi = peak_local_max(distance, indices=False, footprint=np.ones((8, 8)),
labels=opening)
# slightly dilate binary cells to give us an outline slightly outside the cells
dilated = cv2.dilate(opening,kernel,iterations = 1)
# make a label map by watershed segmentation
markers = ndi.label(local_maxi)[0]
labels = watershed(-distance, markers, mask=dilated)
# translate labels to openCV contours and add class of cell to class list
for i in range(1,labels.max()+1):
_, contour,_ = cv2.findContours((labels == i).astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours.append(contour[0].astype(int))
classes.append(class_)
return contours, classes
def process(basenames,probes,slide):
preds = []
ISHs = []
for name in basenames:
ISHs.append(imread(os.path.join(DIR,name + '.tif')))
preds.append(np.moveaxis(imread(os.path.join(DIR,name + '_pred.tif')),0,-1))
binpreds = []
for pred in preds:
binpreds.append(binarize(pred))
# display binary prediction
for bin_pred in binpreds:
colorpred = np.argmax(bin_pred,axis=-1)
cmap = colors.ListedColormap(['white', 'red','green','blue','magenta','cyan','gold','purple','tan','sienna','darkorange'])
sm = matplotlib.cm.ScalarMappable(cmap=cmap)
rgb = (sm.to_rgba(colorpred)[:,:,:3]* 255).astype(np.uint8)
plt.figure(figsize=(21,7))
plt.imshow(rgb)
plt.axis('off')
# specify dilation kernel for the segmentation function
kernel = np.array([[0,1,0],[1,1,1],[0,1,0]],dtype=np.uint8)
# segment all 8 discretized predictions
means_list = []
stand_list = []
norm_list = []
for i,bin_pred in enumerate(binpreds):
# get contours
c, cc = get_contours(bin_pred,kernel)
# display contours
c_img = np.zeros(ISHs[i].shape[1:] + (3,))
for class_ in range(len(classes_for_UNet)):
for j in range(len(c)):
if cc[j] == class_:
c_img = cv2.drawContours(c_img, c, j, cmap(class_)[:3] , thickness = 2)
plt.figure(figsize=(20,20))
plt.imshow(c_img)
plt.axis('off')
cc = [class_map_inv[x] for x in cc]
#make a pandas frame with ids, basename,i,contours,means
df = pd.DataFrame({'slide':slide,'img_number':i,'class':cc,'contour':c,'basename':basenames[i]})
df['id']=[str(slide) + '-' + str(i)+'-'+str(x) for x in df.index.values]
df.index = df.id
#calculate background and means for all channels
means = []
means_stand = []
means_norm = []
img = ISHs[i]
bg_msk = np.ones(img.shape[1:],dtype = np.uint8)
for j in range(len(c)):
msk = np.zeros(img.shape[1:],dtype = np.uint8)
msk = cv2.drawContours(msk, c, j, (1) , cv2.FILLED)
bg_msk = cv2.drawContours(bg_msk, c, j, (0) , cv2.FILLED)
means.append(np.sum(img * msk,axis=(1,2))/np.sum(msk))
bg = np.quantile((bg_msk * img).reshape(img.shape[0],-1),0.6,axis=1)[1:]
means = pd.DataFrame(np.vstack(means)[:,1:])
means.columns = modelprobes + probes
means.index = df.id
means_norm = (means-bg)/(means.max(axis=0)-bg)
means_stand = pd.DataFrame(StandardScaler().fit_transform(means))
means_stand.columns = modelprobes + probes
means_stand.index = df.id
means = pd.concat([df,means],axis=1)
means_norm = pd.concat([df,means_norm],axis=1)
means_stand = pd.concat([df,means_stand],axis=1)
means_list.append(means)
stand_list.append(means_stand)
norm_list.append(means_norm)
means = pd.concat(means_list)
means_norm = pd.concat(norm_list)
means_stand = pd.concat(stand_list)
return ISHs, preds, binpreds, means, means_norm, means_stand
slide1_basenames = ['20190607C57BL6DxClass-1-1-1','20190607C57BL6DxClass-1-4-1','20190607C57BL6DxClass-1-6-1','20190607C57BL6DxClass-1-7-1']
slide1_probes = ['scn10a','scn11a','scn1a','scn8a']
ISHs1, preds1, binpreds1, means1, means_norm1, means_stand1 = process(slide1_basenames,slide1_probes,slide=1)
means1.shape
#clusterlist = ['C1','C3','C4','C5','C6','C8','C9','C11','C12','C13']
probes = ['scn10a','scn11a','scn1a','scn8a']
fig, axes = plt.subplots(ncols=1, nrows=len(probes), figsize=(21,60))
for i,probe in enumerate(probes):
sns.violinplot(means_stand1['class'], means_stand1.loc[:,probe], ax=axes[i])
axes[i].set_title(probe)
#axes[i].set_xticklabels(clusterlist)
fig.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.figure(figsize=[20,15])
plt.subplot(3,1,1)
plt.hist(means_stand1.loc[:,'scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'all',color='grey')
plt.hist(means_stand1.loc[means_stand1['class']=='C3','scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'C3',color='black')
plt.title('C3')
plt.ylabel('relative cell number')
plt.xlabel('normalized scn10a expression level')
sns.despine()
plt.figure(figsize=[20,15])
plt.subplot(3,1,1)
plt.hist(means_stand1.loc[:,'scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'all',color='grey')
plt.hist(means_stand1.loc[means_stand1['class']=='C4','scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'C4',color='black')
plt.title('C4')
plt.ylabel('relative cell number')
plt.xlabel('normalized scn10a expression level')
sns.despine()
plt.figure(figsize=[20,15])
plt.subplot(3,1,1)
plt.hist(means_stand1.loc[:,'scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'all',color='grey')
plt.hist(means_stand1.loc[means_stand1['class']=='C5','scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'C5',color='black')
plt.title('C5')
plt.ylabel('relative cell number')
plt.xlabel('normalized scn10a expression level')
sns.despine()
plt.figure(figsize=[20,15])
plt.subplot(3,1,1)
plt.hist(means_stand1.loc[:,'scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'all',color='grey')
plt.hist(means_stand1.loc[means_stand1['class']=='C6','scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'C6',color='black')
plt.title('C6')
plt.ylabel('relative cell number')
plt.xlabel('normalized scn10a expression level')
sns.despine()
plt.figure(figsize=[20,15])
plt.subplot(3,1,1)
plt.hist(means_stand1.loc[:,'scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'all',color='grey')
plt.hist(means_stand1.loc[means_stand1['class']=='C8','scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'C8',color='black')
plt.title('C8')
plt.ylabel('relative cell number')
plt.xlabel('normalized scn10a expression level')
sns.despine()
plt.figure(figsize=[20,15])
plt.subplot(3,1,1)
plt.hist(means_stand1.loc[:,'scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'all',color='grey')
plt.hist(means_stand1.loc[means_stand1['class']=='C9','scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'C9',color='black')
plt.title('C9')
plt.ylabel('relative cell number')
plt.xlabel('normalized scn10a expression level')
sns.despine()
plt.figure(figsize=[20,15])
plt.subplot(3,1,1)
plt.hist(means_stand1.loc[:,'scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'all',color='grey')
plt.hist(means_stand1.loc[means_stand1['class']=='C13','scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'C13',color='black')
plt.title('C13')
plt.ylabel('relative cell number')
plt.xlabel('normalized scn10a expression level')
sns.despine()
scn5a_names = ['20190611C57BL6DxClass-2-41-3-scn5a40x-aligned','20190611C57BL6DxClass-2-42-3-scn5a40x-aligned','20190611C57BL6DxClass-2-43-3-scn5a40x-aligned']
for i,scn5a_name in enumerate(scn5a_names):
cmap = colors.ListedColormap(['white', 'red','green','blue','magenta','cyan','gold','purple','tan','sienna','darkorange'])
# get the right contours
df = means2[means2.basename == slide2_basenames[i+1]]
c = df['contour']
cc = df['class'].replace(classes_for_UNet)
# get the image
c_img = cv2.cvtColor(imread(os.path.join(DIR,scn5a_name+'.tif')),cv2.COLOR_GRAY2RGB)
c_img
for class_ in range(len(classes_for_UNet)):
for j in range(len(c)):
if cc[j] == class_:
c_img = cv2.drawContours(c_img, c, j, [x * 255 for x in cmap(class_)[:3] ], thickness = 1)
plt.figure(figsize=(20,20))
plt.imshow(c_img)
plt.axis('off')
img = cv2.cvtColor(imread(os.path.join(DIR,scn5a_names[0]+'.tif')),cv2.COLOR_GRAY2RGB)
img.shape
slide2_basenames = ['20190607C57BL6DxClass-2-6-1','20190607C57BL6DxClass-2-9-1','20190607C57BL6DxClass-2-10-1','20190607C57BL6DxClass-2-11-1']
slide2_probes = ['scn10a','scn5a','scn7a','scn9a']
ISHs2, preds2, binpreds2, means2, means_norm2, means_stand2 = process(slide2_basenames,slide2_probes,slide=2)
#clusterlist = ['C1','C3','C4','C5','C6','C8','C9','C11','C12','C13']
probes = ['scn10a','scn5a','scn7a','scn9a']
fig, axes = plt.subplots(ncols=1, nrows=len(probes), figsize=(21,60))
for i,probe in enumerate(probes):
sns.violinplot(means_stand2['class'], means_stand2.loc[:,probe], ax=axes[i])
axes[i].set_title(probe)
#axes[i].set_xticklabels(clusterlist)
fig.tight_layout(rect=[0, 0.03, 1, 0.95])
slide3_basenames = ['20190607C57BL6DxClass-3-4-1','20190607C57BL6DxClass-3-5-1','20190607C57BL6DxClass-3-6-1','20190607C57BL6DxClass-3-7-1']
slide3_probes = ['htr3a','scn1a','tac1','trpv1']
ISHs3, preds3, binpreds3, means3, means_norm3, means_stand3 = process(slide3_basenames,slide3_probes,slide=3)
stand_all = pd.concat([means_stand1,means_stand2,means_stand3],axis=0)
tSNE_all = TSNE(n_components=2,random_state=1234).fit_transform(stand_all.loc[:,modelprobes])
stand_all['tSNE1'] = tSNE_all[:,0]
stand_all['tSNE2'] = tSNE_all[:,1]
with open(os.path.join(DIR,'stand_all.pickle'), "wb") as output_file:
pickle.dump(stand_all, output_file)
#function for plotting expression levels of individual genes in tSNE projection
# function to plot tSNE
labels = stand_all['class']
clusters = list(set(labels))
cm = plt.get_cmap('tab20')
plt.figure(figsize = [15,12])
ax = plt.gca()
for i, c in enumerate(clusters):
idx = labels == c
plt.scatter(stand_all.loc[idx,'tSNE1'],stand_all.loc[idx,'tSNE2'],c=cm(i),label=c,s=50)
plt.legend(bbox_to_anchor=(1.0, 0.5),loc=3);
stand_all.shape[0]
np.sum(stand_all['class'] == 'C9')
#function for plotting expression levels of individual genes in tSNE projection
def plot_gene_tSNE (df,probe,clipping=(0,1)):
plt.set_cmap("inferno")
df = df[df[probe].notnull()]
plt.scatter(df.tSNE1,df.tSNE2,c=np.clip(df[probe],clipping[0],clipping[1]),s=5);
plt.title(probe)
ax = plt.gca()
ax.set_facecolor([60/256,60/256,60/256,1])
plot_gene_tSNE(stand_all,'fxyd2',clipping=(-1.5,3.5))
# plot expression tSNE
plt.set_cmap("inferno")
plt.figure(figsize = [20,20]);
probes = ['scn1a','scn5a','scn7a','scn8a','scn9a','scn10a','scn11a','htr3a','tac1']
for i,probe in enumerate(probes):
plt.subplot(3,3,i+1);
plot_gene_tSNE(stand_all,probe,clipping=(-1,3.5))
# save figure if desired
# plt.savefig('allprobes-tSNE-2.png')
def get_9roi_pic(levels,contours,img,classes,size = 100,n=9,top=True):
if top:
idx = pd.Series(slide1_means_norm[:,9]).sort_values().tail(9).index.values
else:
idx = pd.Series(slide1_means_norm[:,9]).sort_values().head(9).index.values
n_cols = int(np.ceil(np.sqrt(n)))
n_rows = int(np.ceil(n/n_cols))
# cx = m.m10 / m.m00; cy = m.m01 / m.m00;
# new rgb image
new_rgb = np.zeros([size*n_rows,size*n_cols,3])
# for each line in df
for i,(roi, row) in enumerate(df.iterrows()):
# load image,
ish_stack = imread(os.path.join(DIR,'images',row.filename + '-adj.tif'))
w = ish_stack.shape[2]
h = ish_stack.shape[1]
# blank blue channel for cell shape
tmp_red = np.zeros([h,w])
# load all rois from zip
rois = read_roi_zip(os.path.join(DIR,'rois',row.filename + '-rois.zip'))
if rois[roi]['type'] == 'traced' or rois[roi]['type'] == 'freehand':
xs = rois[roi]['x']
ys = rois[roi]['y']
pts = np.array(list(zip(xs,ys))).reshape((-1,1,2))
center_x = int((np.min(xs) + np.max(xs))/2)
center_y = int((np.min(ys) + np.max(ys))/2)
cv2.fillPoly(tmp_red,[pts],True,(255))
elif rois[roi]['type'] == 'rectangle' :
x1 = rois[roi]['left']
y1 = rois[roi]['top']
x2 = x1 + rois[roi]['width']
y2 = y1 + rois[roi]['height']
cv2.rectangle(tmp_red,(x1,y1),(x2,y2),(255),cv2.FILLED)
center_x = int((x1+x2)/2)
center_y = int((y1+y2)/2)
elif rois[roi]['type'] == 'oval':
x1 = rois[roi]['left']
y1 = rois[roi]['top']
width = rois[roi]['width']
height = rois[roi]['height']
center_x = x1 + int(round(width/2))
center_y = y1 + int(round(height/2))
cv2.ellipse(tmp_red,(center_x,center_y), (width,height), 0, 0, 180, (255), cv2.FILLED)
else:
print('error: ',rois[roi])
x1 = min(max(int(center_x - size/2),0),w-size)
x2 = x1 + size
y1 = min(max(int(center_y - size/2),0),h-size)
y2 = y1 + size
tmp_red = tmp_red[y1:y2,x1:x2] * 255
row = int(i/n_cols)
col = int(i%n_cols)
new_rgb[(row*size):((row+1)*size),(col*size):((col+1)*size),0] = tmp_red
new_rgb[(row*size):((row+1)*size),(col*size):((col+1)*size),1] = ish_stack[ref_channel,y1:y2,x1:x2]
new_rgb = new_rgb.astype(np.uint8)
plt.figure(figsize=[20,20])
plt.imshow(new_rgb)
return new_rgb
rgb = get_9roi_pic(results.loc[raw_piezo[raw_piezo.cluster == 'C1'].sort_values('level').tail(10).index[:-1],['filename']],ref_channel=13)
rgb = np.zeros(slide2_ISHs[0].shape[1:] + (3,),dtype=np.uint8)
rgb[:,:,1] = slide2_ISHs[0][12]
green = np.zeros(slide2_ISHs[0].shape[1:])
for j in range(len(contours[0])):
red = cv2.drawContours(green, contours[0], j, (255) , cv2.FILLED)
rgb[:,:,0] = red
plt.imshow(rgb)
imsave('scn9a-test.tif',rgb)
slide3_preds = []
slide3_ISHs = []
slide3_preds.append(np.moveaxis(imread(os.path.join(DIR,'20190607C57BL6DxClass-3-7-1_pred.tif')),0,-1))
slide3_ISHs.append(imread(os.path.join(DIR,'20190607C57BL6DxClass-3-7-1.tif')))
probes = ['htr3a','scn1a','tac1','trpv1']
def binarize(preds):
# function discretizes the predictions, i.e. for each pixel gives the cell class with the
# highest probability, as well as some image cleanup
# input are class prediction probabilities with dimensions height x width x (N_CLASSES + 1)
# we take the maximum class for each pixel and encode it as a one-hot array, i.e.
# we still have a stack of (N_CLASSES + 1) x height x width
preds = np.argmax(preds,axis=-1)
preds = (np.arange(preds.max()+1) == preds[...,None]).astype(np.uint8)
# we then morphologically open all layers except the background layer (0)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (6,6));
for class_ in range(1,11):
preds[:,:,class_] = cv2.morphologyEx(preds[:,:,class_], cv2.MORPH_OPEN, kernel)
# we redefine the background layer as the leftover from all other layers
preds[:,:,0] = np.max(preds[:,:,1:],axis=-1) == 0
return preds
# we make a list of binary predictions and fill it with the discretized 8 channel
# predictions
slide3_binpreds = []
for pred in slide3_preds:
slide3_binpreds.append(binarize(pred))
#display colormaps for the discretized predictions
for i,bin_pred in enumerate(slide3_binpreds):
colorpred = np.argmax(bin_pred,axis=-1)
cmap = colors.ListedColormap(['white', 'red','green','blue','magenta','cyan','gold','purple','tan','sienna','darkorange'])
sm = matplotlib.cm.ScalarMappable(cmap=cmap)
rgb = (sm.to_rgba(colorpred)[:,:,:3]* 255).astype(np.uint8)
plt.figure(figsize=(21,7))
plt.imshow(rgb)
plt.axis('off')
#save figure if desired
#imsave(os.path.join(DIR,'images',IMG_NAMES[i] + '-predcolor.tif'),rgb)
In order to segment out individual cells from blobs of partially overlapping cells we perform a distance transform watershed segmentation and store the individual ROIs as OpenCV contours.
def get_contours(pred_bin, kernel = np.ones((2,2))):
# function performs distance transform watershed segmentation on the individual
# class layers of a discretized prediction
# returns a list of the openCV contours of the cells and a list of the cell
# classes (same order)
contours = []
classes = []
# we segment each class layer separately
for class_ in range(1,pred_bin.shape[-1]):
# get single 2D image of the layer
opening = pred_bin[:,:,class_]
# calculate distance transform
distance = ndi.distance_transform_edt(opening)
# get local maxima as seeds for watershed
local_maxi = peak_local_max(distance, indices=False, footprint=np.ones((8, 8)),
labels=opening)
# slightly dilate binary cells to give us an outline slightly outside the cells
dilated = cv2.dilate(opening,kernel,iterations = 1)
# make a label map by watershed segmentation
markers = ndi.label(local_maxi)[0]
labels = watershed(-distance, markers, mask=dilated)
# translate labels to openCV contours and add class of cell to class list
for i in range(1,labels.max()+1):
_, contour,_ = cv2.findContours((labels == i).astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours.append(contour[0].astype(int))
classes.append(class_)
return contours, classes
# this is a list of lists (per image) of contours (per cell)
contours = []
# and a corresponding list of lists (per image) of cell class (per cell)
contour_classes = []
# specify dilation kernel for the segmentation function
kernel = np.array([[0,1,0],[1,1,1],[0,1,0]],dtype=np.uint8)
# segment all 8 discretized predictions
for bin_pred8 in slide3_binpreds:
c, cc = get_contours(bin_pred8,kernel)
contours.append(c)
contour_classes.append(cc)
cmap = colors.ListedColormap(['white', 'red','green','blue','magenta','cyan','gold','purple','tan','sienna','darkorange'])
for i,img_contours in enumerate(contours):
img = np.zeros(slide1_ISHs[i].shape[1:] + (3,))
for class_ in range(len(classes_for_UNet)):
for j in range(len(img_contours)):
if contour_classes[i][j] == class_:
img = cv2.drawContours(img, img_contours, j, cmap(class_)[:3] , thickness = 2)
plt.figure(figsize=(20,20))
print(img.shape)
plt.imshow(img)
plt.axis('off')
# get all channels
slide3_means = []
slide3_bgs = []
for i,img in enumerate(slide3_ISHs):
#img = img[9:]
means = []
bg_msk = np.ones(img.shape[1:],dtype = np.uint8)
for j in range(len(contours[i])):
msk = np.zeros(img.shape[1:],dtype = np.uint8)
msk = cv2.drawContours(msk, contours[i], j, (1) , cv2.FILLED)
bg_msk = cv2.drawContours(bg_msk, contours[i], j, (0) , cv2.FILLED)
means.append(np.sum(img * msk,axis=(1,2))/np.sum(msk))
bg = (bg_msk * img).reshape(img.shape[0],-1)
slide3_means.append(np.vstack(means))
slide3_bgs.append(np.quantile(bg,0.6,axis=1))
#normalize and standardize each img
#normalizing should be done between max value for a cell and the background
slide3_means_norm = []
slide3_means_stand = []
for i,means in enumerate(slide3_means):
slide3_means_norm.append((means-slide3_bgs[i])/(means.max(axis=0)-slide3_bgs[i]))
slide3_means_stand.append(StandardScaler().fit_transform(means))
slide3_means_norm = np.vstack(slide3_means_norm)
slide3_means_stand = np.vstack(slide3_means_stand)
contour_classes_all_3 = [item for sublist in contour_classes for item in sublist]
contour_classes_all_3 = [class_map_inv[x] for x in contour_classes_all_3]
means_stand_all = np.vstack([slide1_means_stand,slide2_means_stand,slide3_means_stand])
classes_all = [item for sublist in [contour_classes_all_1,contour_classes_all_2,contour_classes_all_3] for item in sublist]
tSNE_all = TSNE(n_components=2,random_state=1234).fit_transform(means_stand_all[:,1:9])
# function to plot tSNE
def plot_tsne(bg_sub_tsne,labels,clusters = [None]):
labels = pd.Series(labels)
if clusters == [None]:
clusters = list(set(labels))
cm = plt.get_cmap('tab20')
plt.figure(figsize = [15,12])
ax = plt.gca()
for i, c in enumerate(clusters):
idx = labels == c
plt.scatter(bg_sub_tsne[idx,0],bg_sub_tsne[idx,1],c=cm(i),label=c,s=50)
plt.legend(bbox_to_anchor=(1.0, 0.5),loc=3);
plot_tsne(tSNE_all,classes_all)
#clusterlist = ['C1','C3','C4','C5','C6','C8','C9','C11','C12','C13']
fig, axes = plt.subplots(ncols=1, nrows=len(probes), figsize=(21,60))
for i in range(len(probes)):
sns.violinplot(contour_classes_all_3, slide3_means_norm[:,i+9], ax=axes[i])
axes[i].set_title(probes[i])
#axes[i].set_xticklabels(clusterlist)
fig.tight_layout(rect=[0, 0.03, 1, 0.95])